{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "OK\n"
     ]
    }
   ],
   "source": [
    "import os\n",
    "import sys\n",
    "import glob\n",
    "import shutil\n",
    "import pandas as pd\n",
    "import numpy as np\n",
    "from ipywidgets import interact\n",
    "from copy import deepcopy\n",
    "import matplotlib.pyplot as plt\n",
    "print('OK')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "sys.path.append('../..')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "from lung_cancer.dataset import FilesIndex\n",
    "from lung_cancer import CTImagesMaskedBatch as CTIMB"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Global index"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "DIR_LUNA = '/notebooks/data/MRT/luna/s*/*.mhd'\n",
    "DIR_DUMP = '/notebooks/Koryagin/Batch_dev/dir_for_dumps/'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "if os.path.exists(DIR_DUMP):\n",
    "    shutil.rmtree(DIR_DUMP)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "ind = FilesIndex(path=DIR_LUNA, no_ext=True)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Create batch by subindexing"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "batch_masked = CTIMB(ind.create_subset(ind.index[[714, 297, 299, 672]]))\n",
    "batch_masked_2 = CTIMB(ind.create_subset(ind.index[[714, 297, 299, 672]]))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Test actions"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "* check load"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<preprocessing.ct_masked_batch.CTImagesMaskedBatch at 0x7f9564d4fa90>"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "batch_masked.load(fmt='raw')\n",
    "batch_masked_2.load(fmt='raw')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "batch_copy = deepcopy(batch_masked)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "* load info about nodules in batches"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style>\n",
       "    .dataframe thead tr:only-child th {\n",
       "        text-align: right;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>seriesuid</th>\n",
       "      <th>coordX</th>\n",
       "      <th>coordY</th>\n",
       "      <th>coordZ</th>\n",
       "      <th>diameter_mm</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1.3.6.1.4.1.14519.5.2.1.6279.6001.100225287222...</td>\n",
       "      <td>-128.699421</td>\n",
       "      <td>-175.319272</td>\n",
       "      <td>-298.387506</td>\n",
       "      <td>5.651471</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1.3.6.1.4.1.14519.5.2.1.6279.6001.100225287222...</td>\n",
       "      <td>103.783651</td>\n",
       "      <td>-211.925149</td>\n",
       "      <td>-227.121250</td>\n",
       "      <td>4.224708</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                           seriesuid      coordX      coordY  \\\n",
       "0  1.3.6.1.4.1.14519.5.2.1.6279.6001.100225287222... -128.699421 -175.319272   \n",
       "1  1.3.6.1.4.1.14519.5.2.1.6279.6001.100225287222...  103.783651 -211.925149   \n",
       "\n",
       "       coordZ  diameter_mm  \n",
       "0 -298.387506     5.651471  \n",
       "1 -227.121250     4.224708  "
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "nodules = pd.read_csv('/notebooks/data/MRT/luna/CSVFILES/annotations.csv')\n",
    "nodules.head(2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "batch_masked = batch_masked.fetch_nodules_info(nodules=nodules)\n",
    "batch_masked_2 = batch_masked_2.fetch_nodules_info(nodules=nodules)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "* create mask in one of two batches before resize"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<preprocessing.ct_masked_batch.CTImagesMaskedBatch at 0x7f9564dc0f60>"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "batch_masked.create_mask()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "* check resize"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "batch_masked = batch_masked.resize(shape=(128, 256, 256), n_workers=6)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "batch_masked_2 = batch_masked_2.resize(shape=(128, 256, 256), n_workers=6)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "* create mask in the second batch after resize"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "batch_masked_2 = batch_masked_2.create_mask()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array(['1.3.6.1.4.1.14519.5.2.1.6279.6001.219428004988664846407984058588',\n",
       "       '1.3.6.1.4.1.14519.5.2.1.6279.6001.280072876841890439628529365478',\n",
       "       '1.3.6.1.4.1.14519.5.2.1.6279.6001.187108608022306504546286626125',\n",
       "       '1.3.6.1.4.1.14519.5.2.1.6279.6001.161855583909753609742728521805'],\n",
       "      dtype='<U101')"
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "batch_masked_2.indices"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array(['1.3.6.1.4.1.14519.5.2.1.6279.6001.219428004988664846407984058588',\n",
       "       '1.3.6.1.4.1.14519.5.2.1.6279.6001.280072876841890439628529365478',\n",
       "       '1.3.6.1.4.1.14519.5.2.1.6279.6001.187108608022306504546286626125',\n",
       "       '1.3.6.1.4.1.14519.5.2.1.6279.6001.161855583909753609742728521805'],\n",
       "      dtype='<U101')"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "batch_masked.indices"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<preprocessing.ct_masked_batch.CTImagesMaskedBatch at 0x7f9564de3630>"
      ]
     },
     "execution_count": 18,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "batch_copy.fetch_nodules_info(nodules)\n",
    "batch_copy.create_mask()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Compare"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "def plot_arr_slices(height, *arrays, clim=(-1200, 300)):\n",
    "    fig, axes = plt.subplots(1, len(arrays), figsize=(14, len(arrays)*8))\n",
    "    \n",
    "    for arr, i in zip(arrays, range(len(arrays))):\n",
    "        depth = arr.shape[0]\n",
    "        n_slice = int(depth * height)\n",
    "        \n",
    "        kwargs = dict()\n",
    "        if np.max(arr) - np.min(arr) > 2.0:\n",
    "            kwargs.update(clim=clim)\n",
    "        else:\n",
    "            kwargs.update(clim=(0, 1))\n",
    "    \n",
    "        axes[i].imshow(arr[n_slice], cmap=plt.cm.gray, **kwargs)\n",
    "    plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "n_pat = 0"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "ind_npat = batch_masked.indices[1]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "fa7361a51875408aae3e3e72645b7d4d"
      }
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/plain": [
       "<function __main__.<lambda>>"
      ]
     },
     "execution_count": 22,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "interact(lambda height: plot_arr_slices(height, \n",
    "                                        batch_masked.get(ind_npat, 'images'),\n",
    "                                        batch_masked_2.get(ind_npat, 'masks'),\n",
    "                                        batch_copy.get(ind_npat, 'masks')),\n",
    "         height=(0.01, 0.99, 0.01))\n",
    "\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Sample nodules from batches"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 49,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "nods_batch = batch_masked.sample_nodules(batch_size=10, nodule_size=(32, 64, 64), share=0.7,\n",
    "                                         variance=[100, 400, 400])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 70,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "selector_events.py[LINE:53]#DEBUG    [2017-07-11 14:14:10,158]  Using selector: EpollSelector\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "<preprocessing.ct_masked_batch.CTImagesMaskedBatch at 0x7f953d818160>"
      ]
     },
     "execution_count": 70,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "nods_batch.dump(dst=DIR_DUMP)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 75,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<preprocessing.ct_masked_batch.CTImagesMaskedBatch at 0x7f9564c98f28>"
      ]
     },
     "execution_count": 75,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "nods_from_dump = CTIMB(FilesIndex(path=DIR_DUMP + '/*', dirs=True))\n",
    "nods_from_dump.load(fmt='blosc')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 76,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "nnod = 6\n",
    "ix_nnod = nods_batch.indices[nnod]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 81,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "232a89814a2f4e0787ac338277c12b10"
      }
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/plain": [
       "<function __main__.<lambda>>"
      ]
     },
     "execution_count": 81,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "interact(lambda height: plot_arr_slices(height, \n",
    "                                        nods_batch.get(ix_nnod, 'images'),\n",
    "                                        nods_batch.get(ix_nnod, 'masks'),\n",
    "                                        nods_from_dump.get(ix_nnod, 'masks')),\n",
    "                                        \n",
    "         height=(0.01, 0.99, 0.01))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.6+"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 1
}
